import numpy as np
from .space import Space
自定義類別參數
def __init__(self, low, high, shape=None, dtype=np.float32):
assert dtype is not None, 'dtype must be explicitly provided. '
self.dtype = np.dtype(dtype)
if shape is None:
assert low.shape == high.shape, 'box dimension mismatch. '
self.shape = low.shape
self.low = low
self.high = high
else:
assert np.isscalar(low) and np.isscalar(high), 'box requires scalar bounds. '
self.shape = tuple(shape)
self.low = np.full(self.shape, low)
self.high = np.full(self.shape, high)
self.low = self.low.astype(self.dtype)
self.high = self.high.astype(self.dtype)
# Boolean arrays which indicate the interval type for each coordinate
self.bounded_below = -np.inf < self.low
self.bounded_above = np.inf > self.high
super(Box, self).__init__(self.shape, self.dtype)
定義邊界
def is_bounded(self, manner="both"):
below = np.all(self.bounded_below)
above = np.all(self.bounded_above)
if manner == "both":
return below and above
elif manner == "below":
return below
elif manner == "above":
return above
else:
raise ValueError("manner is not in {'below', 'above', 'both'}")
定義採樣過程
def sample(self):
"""
Generates a single random sample inside of the Box.
In creating a sample of the box, each coordinate is sampled according to
the form of the interval:
* [a, b] : uniform distribution
* [a, oo) : shifted exponential distribution
* (-oo, b] : shifted negative exponential distribution
* (-oo, oo) : normal distribution
"""
high = self.high if self.dtype.kind == 'f' \
else self.high.astype('int64') + 1
sample = np.empty(self.shape)
# Masking arrays which classify the coordinates according to interval
# type
unbounded = ~self.bounded_below & ~self.bounded_above
upp_bounded = ~self.bounded_below & self.bounded_above
low_bounded = self.bounded_below & ~self.bounded_above
bounded = self.bounded_below & self.bounded_above
# Vectorized sampling by interval type
sample[unbounded] = self.np_random.normal(
size=unbounded[unbounded].shape)
sample[low_bounded] = self.np_random.exponential(
size=low_bounded[low_bounded].shape) + self.low[low_bounded]
sample[upp_bounded] = -self.np_random.exponential(
size=upp_bounded[upp_bounded].shape) - self.high[upp_bounded]
sample[bounded] = self.np_random.uniform(low=self.low[bounded],
high=high[bounded],
size=bounded[bounded].shape)
return sample.astype(self.dtype)
OpenAI gym 源碼:https://bre.is/2tD4gj5x